“Bad programmers worry about the code. Good programmers worry about data structures and their relationships.”
| Measurements |
|---|
| 1. Mean of the integrated profile |
| 2. Standard deviation of the integrated profile |
| 3. Excess kurtosis of the integrated profile |
| 4. Skewness of the integrated profile |
| 5. Mean of the DM-SNR curve |
| 6. Standard deviation of the DM-SNR curve |
| 7. Excess kurtosis of the DM-SNR curve |
| 8. Skewness of the DM-SNR curve |
| 9. True or false pulsar (human-verified) |
Downloaded from: https://archive.ics.uci.edu/ml/machine-learning-databases/00372/HTRU2.zip
# Read in the CSV
HTRU2 <- read_csv(
here("Data_Analyses_MATH_208/Datasets/HTRU2/HTRU_2.csv"),
col_names=FALSE)
# Name the variables
names(HTRU2) = c("Mean_IP", "SD_IP", "EK_IP", "SKW_IP",
"Mean_DMSNR", "SD_DMSNR", "EK_DMSNR", "SKW_DMSNR",
"Class")[1] "spec_tbl_df" "tbl_df" "tbl" "data.frame"
[1] "numeric"
[1] 52
[1] 53
[1] 1 2 3 NA 5
author_list = c("J.K. Rowling", "Stephen King","Michael Lewis",
"Toni Morrison","David McCullough")
mode(author_list)[1] "character"
[1] "logical"
[1] 53 51 60 64 69 74 78 84 86 96 104 112 118 125 132 135
[1] 54 52 61 65 70 75 79 85 87 97 105 113 119 126 133 136
[1] 1 2 3 4 5 6 7 8 9
[1] 20.86614 20.07874 23.62205 25.19685 27.16535 29.13386 30.70866
[8] 33.07087 33.85827 37.79528 40.94488 44.09449 46.45669 49.21260
[15] 51.96850 53.14961
[1] 20.86614 20.07874 23.62205 25.19685 27.16535 29.13386 30.70866
[8] 33.07087 33.85827 37.79528 40.94488 44.09449 46.45669 49.21260
[15] 51.96850 53.14961
[1] 4 7 6 9
| Function name | Argument | Action |
|---|---|---|
| c | Vector elements | Creates vector |
| rep | times/each/length.out | Replicates vector |
| seq.int | from/to/by/length.out/along.with | Creates sequence of integers |
| is.vector | Vector/mode | Returns TRUE if |
| atomic vector |
[1] NA
[1] 8
[1] 18
function (x, ...)
NULL
function (x, trim = 0, na.rm = FALSE, ...)
NULL
[1] mean,ANY-method mean,Matrix-method
[3] mean,sparseMatrix-method mean,sparseVector-method
[5] mean.Date mean.default
[7] mean.difftime mean.IDate*
[9] mean.POSIXct mean.POSIXlt
[11] mean.quosure*
see '?methods' for accessing help and source code
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
[1] "matrix"
[1] "numeric"
$dim
[1] 3 3
[1] 3 3
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
[1] 2
[1] 4
[,1] [,2] [,3]
[1,] 66 78 90
[2,] 78 93 108
[3,] 90 108 126
[,1] [,2] [,3]
[1,] 1 8 21
[2,] 8 25 48
[3,] 21 48 81
U1 = c(203, 204)
U2 = c(323,324,447)
U3 = c(208,427,423,523,545)
mymcgill_stats = list(U1,U2,U3,"Statistics Major")
mymcgill_stats[[1]]
[1] 203 204
[[2]]
[1] 323 324 447
[[3]]
[1] 208 427 423 523 545
[[4]]
[1] "Statistics Major"
$U1
[1] 203 204
$U2
[1] 323 324 447
$U3
[1] 208 427 423 523 545
$Major
[1] "Statistics Major"
[1] 323 324 447
[1] 323 324 447
[1] 323 324 447
$U2
[1] 323 324 447
$U2
[1] 323 324 447
$U3
[1] 208 427 423 523 545
$U1
[1] 203 204
$U3
[1] 208 427 423 523 545
[1] 204
[1] 204
[1] 204
[1] 204
htru2_df = read.csv(here("Data_Analyses_MATH_208/Datasets/HTRU2/HTRU_2.csv"),
header=FALSE)
class(htru2_df)[1] "data.frame"
V1 V2 V3 V4 V5 V6 V7
1 140.56250 55.68378 -0.23457141 -0.6996484 3.199833 19.11043 7.975532
2 102.50781 58.88243 0.46531815 -0.5150879 1.677258 14.86015 10.576487
3 103.01562 39.34165 0.32332837 1.0511644 3.121237 21.74467 7.735822
4 136.75000 57.17845 -0.06841464 -0.6362384 3.642977 20.95928 6.896499
5 88.72656 40.67223 0.60086608 1.1234917 1.178930 11.46872 14.269573
6 93.57031 46.69811 0.53190485 0.4167211 1.636288 14.54507 10.621748
V8 V9
1 74.24222 0
2 127.39358 0
3 63.17191 0
4 53.59366 0
5 252.56731 0
6 131.39400 0
library(tidyverse)
htru2_tbl = read_csv(here("Data_Analyses_MATH_208/Datasets/HTRU2/HTRU_2.csv"),
col_names =FALSE)
class(htru2_tbl)[1] "spec_tbl_df" "tbl_df" "tbl" "data.frame"
# A tibble: 17,898 x 9
X1 X2 X3 X4 X5 X6 X7 X8 X9
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 141. 55.7 -0.235 -0.700 3.20 19.1 7.98 74.2 0
2 103. 58.9 0.465 -0.515 1.68 14.9 10.6 127. 0
3 103. 39.3 0.323 1.05 3.12 21.7 7.74 63.2 0
4 137. 57.2 -0.0684 -0.636 3.64 21.0 6.90 53.6 0
5 88.7 40.7 0.601 1.12 1.18 11.5 14.3 253. 0
6 93.6 46.7 0.532 0.417 1.64 14.5 10.6 131. 0
7 119. 48.8 0.0315 -0.112 0.999 9.28 19.2 480. 0
8 130. 39.8 -0.158 0.390 1.22 14.4 13.5 198. 0
9 107. 52.6 0.453 0.170 2.33 14.5 9.00 108. 0
10 107. 39.5 0.466 1.16 4.08 25.0 7.40 57.8 0
# … with 17,888 more rows
X1 X2 X3 X4 X5 X6 X7
1 140.56250 55.68378 -0.23457141 -0.6996484 3.199833 19.11043 7.975532
2 102.50781 58.88243 0.46531815 -0.5150879 1.677258 14.86015 10.576487
3 103.01562 39.34165 0.32332837 1.0511644 3.121237 21.74467 7.735822
4 136.75000 57.17845 -0.06841464 -0.6362384 3.642977 20.95928 6.896499
5 88.72656 40.67223 0.60086608 1.1234917 1.178930 11.46872 14.269573
6 93.57031 46.69811 0.53190485 0.4167211 1.636288 14.54507 10.621748
X8 X9
1 74.24222 0
2 127.39358 0
3 63.17191 0
4 53.59366 0
5 252.56731 0
6 131.39400 0
mymcgill_stats_tbl = tibble(Courses=list(U1=U1,U2=U2,U3=U3),
Year = c("U1","U2","U3"),
Major = rep("Statistics Major",3))
mymcgill_stats_tbl# A tibble: 3 x 3
Courses Year Major
<list> <chr> <chr>
1 <dbl [2]> U1 Statistics Major
2 <dbl [3]> U2 Statistics Major
3 <dbl [5]> U3 Statistics Major
[1] "names" "class" "row.names" "spec"
[1] "X1" "X2" "X3" "X4" "X5" "X6" "X7" "X8" "X9"
names(htru2_tbl)= c("Mean_IP", "SD_IP", "EK_IP", "SKW_IP",
"Mean_DMSNR", "SD_DMSNR", "EK_DMSNR", "SKW_DMSNR",
"Class")
htru2_tbl# A tibble: 17,898 x 9
Mean_IP SD_IP EK_IP SKW_IP Mean_DMSNR SD_DMSNR EK_DMSNR SKW_DMSNR
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 141. 55.7 -0.235 -0.700 3.20 19.1 7.98 74.2
2 103. 58.9 0.465 -0.515 1.68 14.9 10.6 127.
3 103. 39.3 0.323 1.05 3.12 21.7 7.74 63.2
4 137. 57.2 -0.0684 -0.636 3.64 21.0 6.90 53.6
5 88.7 40.7 0.601 1.12 1.18 11.5 14.3 253.
6 93.6 46.7 0.532 0.417 1.64 14.5 10.6 131.
7 119. 48.8 0.0315 -0.112 0.999 9.28 19.2 480.
8 130. 39.8 -0.158 0.390 1.22 14.4 13.5 198.
9 107. 52.6 0.453 0.170 2.33 14.5 9.00 108.
10 107. 39.5 0.466 1.16 4.08 25.0 7.40 57.8
# … with 17,888 more rows, and 1 more variable: Class <dbl>